import requests
import re
import codecs

url = 'http://bj.58.com/dashanzi/chuzu/pn1/'
for i in range(1,6):
    data = {'ClickID':i}

    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
        }


    res = requests.get(url,params=data,headers=headers)

    html = res.content.decode('utf-8')


    titlepat = """tongji_label.*\n.*\n.*>\n\s*(.*?)\s*</a>"""

    picpat = """lazy_src="(.*?)"\n"""

    roompat = """ <p class="room">(.*?)\s&nbsp;"""

    sizepat = """ &nbsp;&nbsp;&nbsp;&nbsp;(.*?)</p>"""

    moneypat = """<div class="money">\n.*<b>(.*?)</b>.*?\s*</div>"""

    titlelist = re.findall(titlepat,html)
    roomlist = re.findall(roompat,html)
    sizelist = re.findall(sizepat,html)
    moneylist = re.findall(moneypat,html)
    piclist = re.findall(picpat,html)
    print(len(titlelist))
    print(len(roomlist))
    print(len(sizelist))
    print(len(moneylist))
    print(len(piclist))
    
    for i in range(0,len(titlelist)):
        print("| {0:<70}| {1:<15}| {2:<10} | {3:<10} |".format(titlelist[i],roomlist[i],sizelist[i],moneylist[i]))

    f = codecs.open('58output.html','a','utf-8')
    for i in range(0,len(titlelist)):
    	f.write("| {0:<70}| {1:<15}| {2:<10} | {3:<10} | <a href=\"{4:1}\">{5:1}</a> <br/> ".format(titlelist[i],roomlist[i],sizelist[i],moneylist[i],piclist[i],piclist[i]))
    f.close()
